NOTE: from level 4/5 onwards I will execute most steps locally, as the datasets are not as large anymore.
l Here, we will work on the level 5 of the CD4 T cell, which entails:
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/4-level_4/PC/PC_GC.rds"
path_to_mbc <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_3/NBC_MBC/NBC_MBC_clustered_level_3_with_pre_freeze.rds"
path_to_save <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_subseted_integrated_level_5.rds"
# Colors
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",
"#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32",
"black", "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2",
"#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",
"#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",
"Brown")
# Functions
source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/utils.R")
downsample_cells <- function(seurat, var, n_cells) {
seurat$barcode <- colnames(seurat)
proportions <- table(seurat[[var]]) / ncol(seurat)
cells_list <- purrr::map2(names(proportions), proportions, function(x, y) {
n <- n_cells * y
df <- seurat@meta.data[seurat@meta.data[[var]] == x, ]
cells <- df %>%
dplyr::sample_n(n, replace = FALSE) %>%
dplyr::pull(barcode)
cells
})
selected_cells <- unlist(cells_list)
seurat_sub <- subset(seurat, cells = selected_cells)
seurat_sub
}
# Load and subset class-switch memory B cells
mem_b <- readRDS(path_to_mbc)
DimPlot(mem_b)
FeaturePlot(mem_b, c("CD27", "IGHA1"))
mem_b <- subset(mem_b, idents = "1")
set.seed(123)
mem_b <- downsample_cells(mem_b, var = "gem_id", n_cells = 500)
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 37378 features across 15473 samples within 1 assay
## Active assay: RNA (37378 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
seurat <- FindSubCluster(
seurat,
cluster = "CD9",
graph.name = "RNA_snn",
subcluster.name = "cd9_subclusters",
resolution = 0.15
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1422
## Number of edges: 46517
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8708
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "cd9_subclusters", cols = color_palette)
p <- seurat@reductions$umap@cell.embeddings %>%
as.data.frame() %>%
mutate(cluster = seurat$cd9_subclusters) %>%
filter(str_detect(cluster, "CD9")) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point(size = 0.1) +
theme_classic()
p
seurat <- subset(seurat, cd9_subclusters != "CD9_2")
# DEA
Idents(seurat) <- "cd9_subclusters"
dea <- FindMarkers(
seurat,
ident.1 = "CD9_1",
ident.2 = "CD9_0",
logfc.threshold = 0.25,
only.pos = FALSE
)
dea <- dea %>%
rownames_to_column("gene") %>%
arrange(desc(avg_log2FC))
DT::datatable(dea)
# Visualize markers
markers_interest <- c("XBP1", "JCHAIN", "IGHM", "MIR155HG")
feat_plots <- purrr::map(markers_interest, function(x) {
p <- FeaturePlot(seurat, x) +
scale_color_viridis_c(option = "magma")
p
})
feat_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
seurat <- FindNeighbors(seurat, reduction = "harmony", dims = 1:30)
seurat <- FindSubCluster(
seurat,
cluster = "BCL2A1",
graph.name = "RNA_snn",
subcluster.name = "lz_subclusters",
resolution = 0.4
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7466
## Number of edges: 258681
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8067
## Number of communities: 8
## Elapsed time: 1 seconds
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)
p <- seurat@reductions$umap@cell.embeddings %>%
as.data.frame() %>%
mutate(cluster = seurat$lz_subclusters) %>%
filter(str_detect(cluster, "BCL2A1_2")) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point(size = 0.1) +
theme_classic()
p
As we can see, we will subset to cluster “BCL2A1_2”:
excluded_clusters <- c("BCL2A1_0", "BCL2A1_1", "BCL2A1_3", "BCL2A1_4",
"BCL2A1_5", "BCL2A1_6")
selected_cells <- colnames(seurat)[!(seurat$lz_subclusters %in% excluded_clusters)]
seurat <- subset(seurat, cells = selected_cells)
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)
seurat <- subset(
seurat,
cells = colnames(seurat)[Idents(seurat) != "Histone like"]
)
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)
mem_b$annotation_level_5 <- "class-switched MBC"
seurat$annotation_level_5 <- seurat$lz_subclusters
seurat_merged <- merge(x = seurat, y = mem_b)
seurat_list <- SplitObject(seurat_merged, split.by = "assay")
seurat_list <- seurat_list[c("3P", "multiome")]
seurat_list <- purrr::map(
seurat_list,
FindVariableFeatures,
nfeatures = 5000
)
hvg <- purrr::map(seurat_list, VariableFeatures)
shared_hvg <- intersect(hvg$`3P`, hvg$multiome)
# ElbowPlot(seurat_merged, reduction = "harmony")
seurat_merged <- seurat_merged %>%
ScaleData(features = shared_hvg) %>%
RunPCA(features = shared_hvg) %>%
RunHarmony(group.by.vars = "assay", reduction ="pca", dims = 1:12) %>%
RunUMAP(reduction = "harmony", dims = 1:12)
Idents(seurat_merged) <- "annotation_level_5"
DimPlot(seurat_merged, cols = color_palette)
DimPlot(seurat_merged, cols = color_palette, group.by = "assay")
As we can see, there is a subcluster in “GC Derived precursor 3” that is multiome-specific. Thus, we will exclude it:
seurat_merged <- FindNeighbors(seurat_merged, reduction = "harmony", dims = 1:12)
seurat_merged <- FindSubCluster(
seurat_merged,
cluster = "GC Derived precursor 3",
graph.name = "RNA_snn",
subcluster.name = "multiome_specific",
resolution = 0.1
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 889
## Number of edges: 23089
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9159
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat_merged, group.by = "multiome_specific", cols = color_palette)
seurat_merged <- subset(
seurat_merged,
multiome_specific != "GC Derived precursor 3_1"
)
DimPlot(seurat_merged, group.by = "multiome_specific", cols = color_palette)
# Reprocess
seurat_list <- SplitObject(seurat_merged, split.by = "assay")
seurat_list <- seurat_list[c("3P", "multiome")]
seurat_list <- purrr::map(
seurat_list,
FindVariableFeatures,
nfeatures = 5000
)
hvg <- purrr::map(seurat_list, VariableFeatures)
shared_hvg <- intersect(hvg$`3P`, hvg$multiome)
# ElbowPlot(seurat_merged, reduction = "harmony")
seurat_merged <- seurat_merged %>%
ScaleData(features = shared_hvg) %>%
RunPCA(features = shared_hvg) %>%
RunHarmony(group.by.vars = "assay", reduction ="pca", dims = 1:20) %>%
RunUMAP(reduction = "harmony", dims = 1:20)
DimPlot(seurat_merged, cols = color_palette)
input_shiny <- seurat2shiny(seurat_merged, slot = "data", reduction = "umap")
saveRDS(seurat_merged, path_to_save)
saveRDS(
input_shiny$expression,
"~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_expression_to_shiny_app_level_5.rds")
saveRDS(
input_shiny$metadata,
"~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_metadata_to_shiny_app_level_5.rds")